## Estimation of a Multinomial Logit Model by Maximum Likelihood using mnlogit and Fixed point subroutine 
## with inds between 15-30 forming beliefs about same cohort occupational choices but taking inds >31's occupations as given
## by Jose-Alberto Guerra
##    Created   04/2014
##    Modified  17/06/2019
##    Uses: R version 3.1.2 (install old version of R, by holding down the Control key during the launch of RStudio you can use 64-bit 3.1.2 version)
#Proof2Rnew.dta comes from border_reg_weighted4v2.do, notice we have eliminated agricultural
#Proof2RCoordnew.dta is Proof2Rnew but adding the coordinates for objectid_1 points. It comes from TemporyPointsCoordinatesv1.do
#to deal with spatial data visualitaion in R see http://spatial.ly/r/
##Check lines below Robustness, they should be commented out for our basic specification
## Where we guarantee that KS probabilities are within [0,1]
## We guarantee inner loop (fixed point expectations) convergence using tol 10e-10 for the sup-norm stopping criterion
## We follow KS 2012 and KS 2018 using outer loop iterations k=50 and guaranteeing inner loop convergence (see above)
## agerange should be set as:
## 1. agerange <- 1530 If cohorts forming beliefs are between 15-30 years old
## 2. agerange <- 1545 If cohorts forming beliefs are between 15-45 years old
###########################################################################
rm(list=ls(all=TRUE))
#########################
####    Preamble     ####
#########################
##Functions
#codes <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Codes/RCodes/EEACodes/"
codes <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/Final_ReStat/Codes/C_ANALYSIS/" #codes <- "C:/HPC/CODE/"
source(paste(codes,"Mlogit_NAPPHeteroFunctionsv2.R",sep=""))
###########################################################################
#1. DATA
###########################################################################
###########################################################################



###########################
###Heterogenous Beliefs (i.e. ``network'' interactions, correlated effects and fixed point beliefs subroutine)
###########################
rm(list=ls(all=TRUE))
set.seed(12345)
#root <- "S:/NetworkParish/HPC/"
root <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/Final_ReStat/Codes/" #root <- "C:/HPC/"
setwd(paste(root,"Results/ReStat/",sep=""))
codes <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Submission/Final_ReStat/Codes/C_ANALYSIS/" #codes <- "C:/HPC/CODE/"
source(paste(codes,"Mlogit_NAPPHeteroFunctionsv2.R",sep=""))
AM <- 0 #if using Aguirregabiria and Mira==1, 0 if using Kasahara and Shimotsu
#if( AM==1 ){  rootsave <- paste(root,"Results/EEA/",sep="") } else   {rootsave <- paste(root,"Results/EEA/KS/",sep="")}
if( AM==1 ){  rootsave <- paste(root,"Results/ReStat/",sep="") } else   {rootsave <- paste(root,"Results/ReStat/KS/",sep="")}
#codes <- "C:/Users/ja.guerra/Dropbox/NetworkParish2016/Occupational_Choice/Codes/RCodes/EEACodes/" #codes <- "C:/HPC/CODE/"
#source(paste(codes,"Mlogit_NAPPHeteroFunctions.R",sep=""))
####Specify agerange <- 1530 
agerange <- "1545" #"1530"
sink(paste(rootsave,"NAPPEstimationHetenewagecohorts",agerange,".txt",sep=""))

#######
#Load Data 

# Identify those individuals age 15-30 who live in parishes of at least 30 other individuals in that cohort (HHage1530 inds and IDSocialage1530 socials)
load(file=paste(root,"NAPP/ReStat/pooleddataNAPPnewage",agerange,".RData",sep="" ))
HHage1530 <- unique(pooleddata$ID)
IDSocialage1530 <- unique(pooleddata$IDSocial)

#Calling Data see Mlogit_NAPPHeterov3.R
#load(file=paste(root,"NAPP/ReStat/PooledNAPPIDnewwcontinuous.RData",sep="" )) #PooledNAPPID
#load(file=paste(root,"NAPP/ReStat/NAPPXlistnewwcontinuous.RData",sep="" )) #NAPPXlist
#load(file=paste(root,"NAPP/ReStat/coordslistNAPPnewwcontinuous.RData",sep="" )) #coordslist

load(file=paste(root,"NAPP/ReStat/pooleddataNAPPneww50list.RData",sep="" )) # pooleddata, Data per parish in long format (sw) pooled together
IDSocialall <- unique(pooleddata$IDSocial)
#Identify position and ID of parishes not included in the wage1530 exercise
PosNIIDSocial <- which(!(IDSocialall %in% IDSocialage1530))
PosIIDSocial <- which(IDSocialall %in% IDSocialage1530)
NIIDSocial <- IDSocialall[PosNIIDSocial]
load(file=paste(root,"NAPP/ReStat/pooleddataNAPPhomoneww50list.RData",sep="" )) # pooleddatahomo, Data per parish in long format (sw) pooled together for hmogenous
load(file=paste(root,"NAPP/ReStat/pooleddatalistNAPPneww50list.RData",sep="" )) #pooleddatalist, Data per parish in wide format (sw.1,sw.2,...) pooled together
load(file=paste(root,"NAPP/ReStat/pooleddatalistNAPPhomoneww50list.RData",sep="" )) #pooleddatalisthomo, Data per parish in wide format (sw.1,sw.2,...) pooled together
load(file=paste(root,"NAPP/ReStat/datalistNAPPneww50list.RData",sep="" )) #datalist, Lists of data per parish in wide format (sw.1,sw.2,...)
load(file=paste(root,"NAPP/ReStat/coordslistNAPPneww50list.RData",sep="" )) #coordslist, Lists of coordinates per parish
load(file=paste(root,"NAPP/ReStat/dneighboursNAPPneww50list.RData",sep="")) #dneighbours, Lists of neigbours identities per parish
load(file=paste(root,"NAPP/ReStat/wlistNAPPneww50list.RData",sep="")) #wlist, Sparse Lists of neighbours per parish

#Restricting data to those individuals age1530 and parishes that have at least 30 individuals from that cohort
pooleddata <- pooleddata[!(pooleddata$IDSocial %in% NIIDSocial),]
pooleddatahomo <- pooleddatahomo[!(pooleddatahomo$IDSocial %in% NIIDSocial),]
pooleddatalist <- pooleddatalist[!(pooleddatalist$IDSocial %in% NIIDSocial),]
pooleddatalisthomo <- pooleddatalisthomo[!(pooleddatalisthomo$IDSocial %in% NIIDSocial),]
datalist <- datalist[(PosIIDSocial)]
coordslist <- coordslist[(PosIIDSocial)]
dneighbours <- dneighbours[(PosIIDSocial)]
wlist <- wlist[(PosIIDSocial)]

#To save data paste(root,"Mola Data/",sep="")
save(pooleddata, file=paste(root,"NAPP/ReStat/pooleddataNAPPnewagecohorts",agerange,".RData",sep="" )) #Data per parish in long format (sw) pooled together
save(pooleddatahomo, file=paste(root,"NAPP/ReStat/pooleddataNAPPhomonewagecohorts",agerange,".RData",sep="" )) #Data per parish in long format (sw) pooled together for homogeneous beliefs
save(pooleddatalist, file=paste(root,"NAPP/ReStat/pooleddatalistNAPPnewagecohorts",agerange,".RData",sep="" )) #Data per parish in wide format (sw.1,sw.2,...) pooled together
save(pooleddatalisthomo, file=paste(root,"NAPP/ReStat/pooleddatalistNAPPhomonewagecohorts",agerange,".RData",sep="" )) #Data per parish in wide format (sw.1,sw.2,...) pooled together for the homogeneous case
save(datalist, file=paste(root,"NAPP/ReStat/datalistNAPPnewagecohorts",agerange,".RData",sep="" )) #Lists of data per parish in wide format (sw.1,sw.2,...)
save(coordslist, file=paste(root,"NAPP/ReStat/coordslistNAPPnewagecohorts",agerange,".RData",sep="" )) #Lists of coordinates per parish
save(dneighbours, file=paste(root,"NAPP/ReStat/dneighboursNAPPnewagecohorts",agerange,".RData",sep="")) #Lists of neigbours identities per parish
save(wlist, file=paste(root,"NAPP/ReStat/wlistNAPPnewagecohorts",agerange,".RData",sep="")) #Sparse Lists of neighbours per parish

options(max.print=1000000)
gc()
print("Heterogeneous  beliefs: ``network'' interactions, correlated effects and fixed point beliefs subroutine")

#define which variables we include in the estimation
xvars <- c("AGE","married",  "nchild",   "nservant", "stayerp") #, "stayerc" "SEX",
wvar <- "sw"

pooleddataage1530 <- pooleddata[pooleddata$ID %in% HHage1530,]
## Naive Estimation Heterogeneous
###################
print("Naive Estimation")
gc()
#1. x_i  
fformula <-mFormula(as.formula(paste("choice ~ 1 | ", paste(xvars, collapse= "+"),"|", wvar ,sep="")))          
ml.w <- mnlogit(fformula,pooleddataage1530,"alt")
save(ml.w,file=paste("Mlwnewagecohorts",agerange,".RData",sep=""))
print("w, x_i")
print(summary(ml.w))
#2. x_i, x_p
##w
fformula <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep="")),"|", wvar ,sep="")))          
ml.om.w <- mnlogit(fformula, pooleddataage1530,"alt")
save(ml.om.w,file=paste("Mlomwnewagecohorts",agerange,".RData",sep=""))
print("w, x_i x_p")
print(summary(ml.om.w))
#3. x_i, x_p, t_b
##w
fformula <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDCivil)"),"|", wvar ,sep="")))          
ml.m.w <- mnlogit(fformula, pooleddataage1530,"alt")
save(ml.m.w,file=paste("Mlmwnewagecohorts",agerange,".RData",sep=""))
print("w, x_i x_p t_b")
print(summary(ml.m.w))

#4. x_i, x_p, t_s
##w
fformula <-mFormula(as.formula(paste("choice ~ 1 | ", paste(paste(xvars, collapse= "+"),"+",paste(xvars, "w",collapse= "+",sep=""),"+","factor(IDSocial)"),"|", wvar ,sep=""))) 
ml.mt.w <- mnlogit(fformula, pooleddataage1530,"alt")
save(ml.mt.w,file=paste("Mlmtwnewagecohorts",agerange,".RData",sep=""))
#0 When robustness we have to change the ending of the file to accomodate one of the following
# #wexernei COULD BE atleast1 w100list nmovers age1530 w50list atmost10
#1. end of robust
print("w, x_i x_p t_p")
print(summary(ml.mt.w))
sink()

## Nested Pseudo likelihood Estimation Heterogeneous
###################

source(paste(codes,"Mlogit_NAPPHeterofformula1v2agecohortsbeliefs.R",sep=""))